

cd "D:\Dropbox\book_welfare\replication"


**********************************
* female author shares and growth Bookstat  
**********************************



use data\bookstat_gender_pyear_genre_asin.dta, clear
 

	collapse (sum) males=N_male name_present=N_name total, by(pyear genre)
	tempfile number
	save `number'
	
use  data\bookstat_sales_gender_pyear_genre_asin.dta, clear 

		collapse (sum) q qm=q_male qname=q_name, by(pyear year genre)

		merge m:1 pyear genre  using `number'
	
	keep if pyear>=1960 & pyear<=2021 
	
	gen rv_all = (name_present - males)/total 
	gen rv_in = (name_present - males)/name_present 

	egen Q=sum(q), by(genre)
	gen genre1=genre 
	replace genre1 = "other" if Q<2.5e+07
	
		
preserve 
	egen gno=group(genre)
	gen RV_ALL=rv_all*(pyear==2021)
	gen RV_IN=rv_in*(pyear==2021)

	collapse (mean) gno (max) RV_*, by(genre) 
	tempfile genre 
	save `genre'
restore 

egen gno=group(genre1)

	gen lrv_in=log(rv_in)
	gen lrv_all=log(rv_all)

	* egen gno=group(genre)
	
	
* all 	

reg lrv_all i.gno#c.pyear i.gno 

 preserve 
		
parmest, norestore 
	split parm, parse(".gno#c." "b.gno#c.")
	gen gno=real(parm1)
	drop if gno==. 

	merge 1:1 gno using `genre'

	gsort -estimate 
	gen order=_n 

	keep if estimate~=.

	
	*(scatter RV_ALL order, yaxis(2) msymbol(x))
	* ytitle("female 2021 author share (x)", axis(2))
	
	twoway (scatter estimate order) (rcap max95 min95 order)  , scheme(lean2) legend(off) ///
	xlabel( 1	"Children's Books" ///
2	"Education & Teaching" ///
3	"Mystery, Thriller & Suspense" ///
4	"Books on CD" ///
5	"Libros en español" ///
6	"Computers & Technology" ///
7	"Deals in Books" ///
8	"Foreign Languages" ///
9	"Medical Books" ///
10	"Calendars" ///
11	"Law" ///
12	"Engineering & Transportation" ///
13	"Humor & Entertainment" ///
14	"Lesbian, Gay, Bisexual & Transgender" ///
15	"History" ///
16	"Health, Fitness & Dieting" ///
17	"Arts & Photography" ///
18	"Kindle Short Reads" ///
19	"Cookbooks, Food & Wine" ///
20	"Crafts, Hobbies & Home" ///
21	"Biographies & Memoirs" ///
22	"Comics & Graphic Novels" ///
23	"Literature & Fiction" ///
24	"Kindle eBooks" ///
25	"Business & Money" ///
26	"Christian Books & Bibles" , labsize(vsmall) angle(forty_five) noticks) xtitle("") ytitle("annual female author share growth")  


	graph export latex_text\figures\female_author_share_growth_all_asin.pdf, as(pdf) name("Graph") replace 
	
restore 


* in - 


reg lrv_in i.gno#c.pyear i.gno 
 preserve 
		
parmest, norestore 
	split parm, parse(".gno#c." "b.gno#c.")
	gen gno=real(parm1)
	drop if gno==. 

	merge 1:1 gno using `genre'

	gsort -estimate 
	gen order=_n 

	keep if estimate~=.

	twoway (scatter estimate order) (rcap max95 min95 order)  (scatter RV_IN order, yaxis(2) msymbol(x)), scheme(lean2) legend(off) ///
	xlabel( 1	1	"Children's Books" ///
2	"Education & Teaching" ///
3	"Mystery, Thriller & Suspense" ///
4	"Computers & Technology" ///
5	"Books on CD" ///
6	"Libros en español" ///
7	"Deals in Books" ///
8	"Medical Books" ///
9	"Foreign Languages" ///
10	"Lesbian, Gay, Bisexual & Transgender" ///
11	"Health, Fitness & Dieting" ///
12	"Calendars" ///
13	"Engineering & Transportation" ///
14	"Arts & Photography" ///
15	"History" ///
16	"Humor & Entertainment" ///
17	"Kindle Short Reads" ///
18	"Cookbooks, Food & Wine" ///
19	"Law" ///
20	"Crafts, Hobbies & Home" ///
21	"Literature & Fiction" ///
22	"Biographies & Memoirs" ///
23	"Comics & Graphic Novels" ///
24	"Kindle eBooks" ///
25	"Business & Money" ///
26	"Christian Books & Bibles" , labsize(vsmall) angle(forty_five) noticks) xtitle("") ytitle("annual female author share growth") ytitle("female 2021 author share (x)", axis(2)) t1(among identified)


	graph export latex_text\figures\female_author_share_growth_in_asin.pdf, as(pdf) name("Graph") replace 
	
restore 




***********************************************
* female author share growth - Goodreads 
***********************************************




use data\books_year.dta, clear  


		keep if year>=2007 & year<=2016
		keep if pubyr >=1960 & pubyr<=2016 
		replace fbookp=2 if fbookp==. 
		
		collapse (count) n=q , by(pubyr fbookp genre1 )
		reshape wide n, i(pubyr genre1) j(fbookp)
		
		gen nf_all = n1/(n1+n0+n2)
		gen lnf_all = ln(nf_all)
		egen gno = group(genre1)
		
		reg lnf_all i.gno#c.pubyr i.gno 
		



preserve 
			
	parmest, norestore 
		split parm, parse(".gno#c." "b.gno#c.")
		gen gno=real(parm1)
		drop if gno==. 


		gsort -estimate 
		gen order=_n 

		keep if estimate~=.

		
		gen genre="children" if gno ==1
		replace genre="comics" if gno ==2
		replace genre="fantasy" if gno ==3
		replace genre="fiction" if gno ==4
		replace genre="history" if gno ==5
		replace genre="missing" if gno ==6
		replace genre="mystery" if gno ==7
		replace genre="non-fiction" if gno ==8
		replace genre="poetry" if gno ==9
		replace genre="romance" if gno ==10
		replace genre="young adult" if gno ==11


		twoway (scatter estimate order) (rcap max95 min95 order), scheme(lean2) xlabel(1 "History" ///
		2	"Poetry" ///
		3	"Missing" ///
		4	"Fiction" ///
		5	"Non-fiction" ///
		6	"Comics" ///
		7	"Fantasy" ///
		8	"Children" ///
		9	"Mystery" ///
		10	"Young adult" ///
		11	"Romance" , labsize(small) angle(forty_five) noticks)  xtitle("") legend(off)  ytitle("annual female author share growth")


	graph export latex_text\figures\female_author_share_growth_gr_all.pdf, as(pdf) name("Graph") replace 

		
restore 		
